library(scales)
library(beepr)
library(tableone)
library(dplyr)
library(rbounds)
library(causalsens)
library(Matching)
labs.csv <- read.csv('/Users/production/Dropbox/Code/R/OplaconUSA/key/labs.csv', stringsAsFactors=F)
labs <- function (x) labs.csv[labs.csv$var==x,]$label[1]

# PARAMETERS
p <- list(pc_id_col='pid')
p$run <- 'manual'
p$lc <- 'f' # Land cover whose change should be avoided ('f' for forest, 'u' for undeveloped)
p$outcome <- 'p_lcc_ann'
p$nearby <- F # Are we estimating the impact of nearby protection?
p$rep <- 1 # repetitions (only necessary with sampling or matching without replacement)
p$tr.sample <- 1 # sampling ratio for treatment units
p$ct.sample <- 1 # sampling ratio for control units - to reduce size of control sample, as Matching doesn't scale well (O(N^2))
p$tr.year.min <- 1985 # properties treated earlier than this year will not be considered treatment units
p$tr.year.max <- 2006 # properties treated after this year will not be considered treatment units
p$year.offset <- 3 # how many years after protection do we want to start observing forest loss?
p$p.prot.protected.min <- 80 # minimum percentage of parcel area that needs to be recently protected to count as "treated"
p$p.prot.unprotected.max <- 20 # maximum ratio of parcel area that can be protected to count as "untreated"
p$tr.continuous <- T # should the treatment variable be continuous in the regression?
p$weighted <- T # will observations be weighted by area (during matching and post-matching regressions/estimates)?
p$replace <- T # is matching with replacement? (Note: WITH replacement is not yet fully implemented - requires removing matched controls between year-wise runs)
p$cal <- 1 # calipers
p$m <- 1 # numbers of matches
p$cov <- c('slope', 'travel_asinh', 'ha_log', 'hh_inc_med_bg_1990', 'p_wet', 'pop_dens_bg_1990_asinh', 'coast_2500','river_lake_frontage_asinh') # general covariates #coast_2500
p$cov_yr <- c('p_lc_start', 'p_nb_prot') # covariates that will be generated for each year
p$prot_buffer <- 200 # radius of nearby protection buffer
p$exclude_buffer <- F # (impact analysis only) do we exclude from the control pool units that are within buffer of new protection?
p$v.other <- c('e_orgtype','f_orgtype', 'ha')
p$min.lake_ha <- 1 # area of lakes whose frontage would be included
p$include_any_fee <- T # should any fee ownership with public and NGO owners be included? (or only those marked as "fee_prot")
p$beep <- T

# Overwrite parameters with entries from wrapper, if provided
if (exists('p_in')) {
  print(paste('Wrapper Run:', p_in['run']))
  for (p_v in names(p_in)) p[p_v] <- p_in[p_v]
  p$beep <- F
}

# Which additional subgroups should the impact analysis be run for?
p$groups <- c(c('tr_year'), if (!p$nearby) c('protmech', 'tr_type'))


#########


# Load dataset
if(!exists('d')) {
  d <- read.csv('Data/GIS/USA/GDB/- all/pc/pc_MA_ConsLett.csv')
  
  # Set NAs in selected columns to zero
  for (v in c('p_wet', 'p_e', 'p_f', 'e_p', 'f_p', 'p_prot','coast_2500', 'river_frontage', 'lake_frontage', 'lake_ha')) {
    d[is.na(d[v]),v] <- 0
  }

  # CHECK: is d$lake_front == d$lake_frontage!?!?
  
  # Compute variables that are independent of parametrization (need to be done only once)
  d$river_lake_frontage <- d$river_frontage + ifelse(d$lake_ha > p$min.lake_ha, d$lake_front, 0)
  d$ha_log <- log(d$ha)
  d$travel_asinh <- asinh(d$travel)
  for (v in colnames(d)) if (substr(v,0,8) == 'pop_dens') d[paste(v,'_asinh',sep='')] = asinh(d[v])
  d$river_lake_frontage_asinh <- asinh(d$river_lake_frontage)
  
  # Protection mechanism: easement or fee?
  # Decision is based on largest individual area overlap, with ties broken by easements
  d$protmech <- ifelse(d$e_p >= d$f_p & d$e_p > 0, 'e', ifelse(d$f_p > 0 & (if(p$include_any_fee) T else d$f_fee_prot==1), 'f', NA))
  
  # Protection type + organization type
  d$tr_type <- ifelse(d$protmech == 'e' & d$e_orgtype != '', paste('e_', d$e_orgtype, sep=''),
                      ifelse(d$protmech == 'f' & d$f_orgtype != '', paste('f_', d$f_orgtype, sep=''), '-'))
  # # Encumbered sales
  # d$encumbered_sale = (substr(d$match_quality,0,1) == '1') & d$ls_year > d$e_year
  # d$encumbered_sale[is.na(d$encumbered_sale)] <- FALSE
  # 
  # # Protection type + organization type + encumbered sale
  # d$tr_type_sold <- ifelse(d$encumbered_sale, paste(d$tr_type,'_sold',sep=''), d$tr_type)
}


# TREATMENT & OUTCOME
pb <- p$prot_buffer # shortcut
if(!p$nearby) {
  # Treatment year
  d$tr_year <- ifelse(d$protmech=='e', d$e_year, ifelse(d$protmech=='f', d$f_year, NA))
  d$p_prot_protmech <- ifelse(d$protmech=='e', d$e_p, ifelse(d$protmech=='f', d$f_p, NA))
  # Treatment group: properties that have minimum % protection, and whose protection mechanism has a treatment year within time period
  d$tr <- ifelse(d$p_prot_protmech > p$p.prot.protected.min & !is.na(d$tr_year) & d$tr_year >= p$tr.year.min & d$tr_year <= p$tr.year.max, 1, 0)
  # Treatment variable: continuous or not
  d$tr_v <- if(p$tr.continuous) d$p_prot_protmech/100 else d$tr
  # Control pool: properties that were never protected
  d$ct <- ifelse(d$p_prot < p$p.prot.unprotected.max, 1, 0)
  # Control pool: do we exclude properties within a buffer from changes in protection?
  if(p$exclude_buffer) {
    d['p_nb_prot_diff_sum_any'] <- d[paste('p_prot_2012_', pb, sep='')] - d[paste('p_prot_', p$tr.year.min, '_', pb, sep='')]
    d[d['p_nb_prot_diff_sum_any']>0,'ct'] <- 0
  }
} else {
  # Compute yearly increase in nearby protection
  for (yr in (p$tr.year.min):2012) d[paste('p_nb_prot_', yr, '_diff', sep='')] <- d[paste('p_prot_', yr, '_', pb, sep='')] - d[paste('p_prot_', yr-1, '_', pb, sep='')]
  # Sum of all increases in nearby protection within time period (target variable needed to be referenced using the string label; otherwise it is weirdly named after first input variable)
  d['p_nb_prot_diff_sum'] <- d[paste('p_prot_', p$tr.year.max, '_', pb, sep='')] - d[paste('p_prot_', p$tr.year.min, '_', pb, sep='')]
  # Sum of all increases in nearby protection until end of deforestation data
  d['p_nb_prot_diff_sum_any'] <- d[paste('p_prot_2012_', pb, sep='')] - d[paste('p_prot_', p$tr.year.min, '_', pb, sep='')]
  # Find year of largest increase
  d$p_nb_prot_diff_maxyear <- max.col(d[paste('p_nb_prot_', p$tr.year.min:p$tr.year.max, '_diff',sep='')], ties.method='first') + p$tr.year.min - 1
  d[d$p_nb_prot_diff_sum==0, 'p_nb_prot_diff_maxyear'] <- 0 # Needs to be set to 0 after above line
  
  # Treatment group: unprotected parcels with any increase in nearby protection within treatment time period
  d$tr <- ifelse(d$p_prot < p$p.prot.unprotected.max & d$p_nb_prot_diff_sum > 0, 1, 0)
  # Treatment year: year of largest increase
  d$tr_year <- d$p_nb_prot_diff_maxyear
  # Treatment variable: continuous or not
  d$tr_v <- if (p$tr.continuous) d$p_nb_prot_diff_sum/100 else d$tr
  # Control pool: unprotected parcels without any increase in nearby protection within entire time period
  d$ct <- ifelse(d$p_prot < p$p.prot.unprotected.max & d$p_nb_prot_diff_sum_any == 0, 1, 0)
}


# Covariates
d$has_cov <- rowSums(!is.na(d[p$cov])) == length(p$cov)


# Prepare Folder
rf <- paste("Data/GIS/USA/GDB/- all/stats/MA/", if(!is.null(p$run) & nchar(p$run)>0) p$run else as.integer(Sys.time()),'/',sep="")
f <- function(...) { paste(rf, ..., sep='') }
dir.create(f(''))
sink(f('parameters.txt'))
print(p)
sink()

# Extract full sample of treatment and control units, keeping only necessary columns
cols.keep <- c(p$pc_id_col, 'tr','tr_v', p$cov, p$groups, p$v.other, paste('p_',p$lc,'_',seq(1985,2012),sep=''), paste('p_prot_',seq(1984,2012),'_',pb,sep=''))
d.tr.full <- d[d$has_cov & d$tr == 1, cols.keep]
d.ct.full <- d[d$has_cov & d$ct == 1, cols.keep]

# Pre-matching balance indicators (with 1985 land cover and 1984 nearby protection)
smd.bm <- data.frame(t(ExtractSmd(CreateTableOne(vars = c(p$cov,paste('p_', p$lc, '_1985',sep=''),paste('p_prot_1984_',pb,sep='')), strata = "tr", data = rbind(d.tr.full, d.ct.full), test = FALSE))))
colnames(smd.bm) <- paste('smd.',c(p$cov,p$cov_yr),'.bm',sep='')
#smd.bm[is.na(smd.bm)] <- 0

# Repetitions
out.m <- NULL # container for matching quality indicators
out.g <- NULL # container for impact estimates by group
out.q <- NULL # container for quantile breakpoints for each group
for (i in 1:p$rep) {
  print(i)

  # Create full treatment and control samples with random order (in case we do matching without replacement)
  d.tr.sample <- d.tr.full[sample(nrow(d.tr.full), round(nrow(d.tr.full)*p$tr.sample)),]
  d.ct.sample <- d.ct.full[sample(nrow(d.ct.full), round(nrow(d.ct.full)*p$ct.sample)),]
  
  d.m <- NULL # container for full matched sample
  o.m.yr.all <- NULL # container for matching quality indicators
  for (yr in sort(unique(d.tr.full$tr_year))) {
    print(yr)
    
    # Build year-specific dataset
    p_lc_start_col = paste('p_',p$lc,'_',max(1985,(yr + p$year.offset)),sep='')
    p_lc_end_col = paste('p_',p$lc,'_2012',sep='')
    p_nb_prot_col = paste('p_prot_',(yr-1),'_',pb,sep='')
    cols <- c(p$pc_id_col, 'tr', 'tr_v', p$cov, p_lc_start_col, p_lc_end_col, p_nb_prot_col, p$groups, p$v.other)
    d.tr.yr <- d.tr.sample[!is.na(d.tr.sample$tr_year) & (d.tr.sample$tr_year == yr),cols]
    d.ct.yr <- d.ct.sample[,cols]
    d.bm.yr <- rbind(d.tr.yr,d.ct.yr)
    # Rename year-specific columns to standard name
    colnames(d.bm.yr)[which(colnames(d.bm.yr)==p_lc_start_col)] <- 'p_lc_start'
    colnames(d.bm.yr)[which(colnames(d.bm.yr)==p_lc_end_col)] <- 'p_lc_end'
    colnames(d.bm.yr)[which(colnames(d.bm.yr)==p_nb_prot_col)] <- 'p_nb_prot'
    # Compute average annual deforestation within time period
    d.bm.yr$p_lcc_ann <- (d.bm.yr$p_lc_end - d.bm.yr$p_lc_start)/(2012 - yr - p$year.offset)
    # d.bm$p_nb_prot_bool <- ifelse(d.bm$p_nb_prot > 0,1,0)
    d.bm.yr$year_ref <- yr
    
    # Match
    m <- Match(Tr=d.bm.yr$tr, X=d.bm.yr[,c(p$cov,p$cov_yr)], M=p$m, cal=p$cal, ties=F, replace=p$replace, weights=if(p$weighted) d.bm.yr$ha else NULL, version='fast')
    
    # Extract matched dataset
    d.m.yr <- rbind(d.bm.yr[m$index.treated,],d.bm.yr[m$index.control,])
    
    # Create matching quality indicators 
    # smd.bm <- data.frame(t(ExtractSmd(CreateTableOne(vars = c(p$cov,p$cov_yr), strata = "tr", data = d.bm.yr, test = FALSE))))
    # colnames(smd.bm) <- paste('smd.',c(p$cov,p$cov_yr),'.bm',sep='')
    # smd.bm[is.na(smd.bm)] <- 0
    # o.m.yr$smd.bm <- round(rowMeans(abs(smd.bm), na.rm=T),3)[1]
    # smd.m <- data.frame(t(ExtractSmd(CreateTableOne(vars = c(p$cov,p$cov_yr), strata = "tr", data = d.m.yr, test = FALSE))))
    # colnames(smd.m) <- paste('smd.', c(p$cov,p$cov_yr),'.m',sep='')
    # smd.m[is.na(smd.m)] <- 0
    # o.m.yr$smd.m <- round(rowMeans(abs(smd.m), na.rm=T),3)[1]
    # o.m.yr <- cbind(o.m.yr, smd.bm, smd.m)
    # o.m.yr.all <- rbind(o.m.yr.all, o.m.yr)
    
    # Build full matched dataset
    d.m <- rbind(d.m, d.m.yr)
    
    if (p$replace==F) {
      d.ct.sample = d.ct.sample[!(d.ct.sample$pid %in% d.m.yr[d.m.yr$tr==0,'pid']),]
    }
  }
  d.tr.m <- d.m[d.m$tr==1,]
  d.ct.m <- d.m[d.m$tr==0,]
  
  # Post-matching balance indicators
  o.m <- data.frame(rep=i, n.tr.bm=nrow(d.tr.sample),    n.ct.bm=nrow(d.ct.sample), 
                           n.tr.m=nrow(d.tr.m),          n.ct.m=nrow(d.ct.m),
                           ha.tr.bm=sum(d.tr.sample$ha), ha.ct.bm=sum(d.ct.sample$ha),
                           ha.tr.m=sum(d.tr.m$ha),       ha.ct.m=sum(d.ct.m$ha))
  smd.m <- data.frame(t(ExtractSmd(CreateTableOne(vars = c(p$cov,p$cov_yr), strata = "tr", data = d.m, test = FALSE))))
  colnames(smd.m) <- paste('smd.', c(p$cov,p$cov_yr),'.m',sep='')
  smd.m[is.na(smd.m)] <- 0
  o.m$smd.bm <- round(rowMeans(abs(smd.bm), na.rm=T),4)
  o.m$smd.m <- round(rowMeans(abs(smd.m), na.rm=T),4)
  o.m <- cbind(o.m, smd.bm, smd.m)
  out.m <- rbind(out.m,o.m)

  # Subgroups analysis
  for (vg in c('full', p$cov, p$cov_yr, p$groups)) {
    
    # Define groups (split by quantile or categories)
    if (vg == 'full') {
      d.tr.group <- rep('all',nrow(d.tr.m))
    } 
    else if (is.numeric(d.tr.m[,vg])) {
      # d.tr.group <- ifelse(d.tr.m[,vg] > median(d.tr.m[,vg]),'high','low')
      d.tr.group <- ifelse(d.tr.m[,vg] > quantile(d.tr.m[,vg],0.66),'high',
                           ifelse(d.tr.m[,vg] > quantile(d.tr.m[,vg],0.33),'middle','low'))
      out.q <- rbind(out.q,data.frame(rep=i,vg=vg,q0=min(d.tr.m[,vg], na.rm=T),
                                      q33=quantile(d.tr.m[,vg],0.33),
                                      q67=quantile(d.tr.m[,vg],0.66),
                                      q100=max(d.tr.m[,vg],na.rm=T)))
    } else {
      d.tr.group <- d.tr.m[,vg]
    }
    
    # Subgroup dataset
    for (g in unique(d.tr.group)) {
      ix = (d.tr.group == g)
      # Exclude cases in which LR is overspecified
      if (is.na(g) || sum(ix,na.rm=T) < length(c(p$cov,p$cov_yr)) + 2) next 
      o.g <- data.frame(rep=i,vg=vg,g=g,n.m=sum(ix),ha=sum(d.tr.m[ix,'ha']))
      d.tr.mg <- d.tr.m[ix,]
      d.ct.mg <- d.ct.m[ix,]
      d.mg <- rbind(d.tr.mg,d.ct.mg)
      
      lr <- lm(as.formula(paste('p_lcc_ann ~ tr_v + ', paste(c(p$cov, p$cov_yr), collapse='+'))),
               data=d.mg, weights=if(p$weighted) d.mg$ha else NULL)
      lrc <- summary(lr)$coefficients[2,]
      
      # Averages of treated and control
      w = if(p$weighted) d.tr.mg$ha else rep(1,nrow(d.tr.mg))
      o.g$mean.tr <- weighted.mean(d.tr.mg$p_lcc_ann, w=w)
      o.g$sd.tr <- (sum(w*(d.tr.mg$p_lcc_ann - o.g$mean.tr)^2)/sum(w))^0.5
      w.ct <- if(p$weighted) d.ct.mg$ha else rep(1,nrow(d.ct.mg))
      o.g$mean.ct <- weighted.mean(d.ct.mg$p_lcc_ann, w=w.ct)
      o.g$sd.ct <- (sum(w.ct*(d.ct.mg$p_lcc_ann - o.g$mean.ct)^2/sum(w.ct)))^0.5
      options(warn=-1)
      
      # Average predicted values
      d.tr.mg$y.tr.hat <- predict(lr, d.tr.mg)
      o.g$mean.tr.hat <- weighted.mean(d.tr.mg$y.tr.hat, w=w)
      d.tr.mg$y.cf.hat <- predict(lr, cbind(d.tr.mg[,!names(d.tr.mg) %in% c('tr_v')],data.frame(tr_v=0)))
      o.g$mean.tr.cf <- weighted.mean(d.tr.mg$y.cf.hat, w=w)
      options(warn=1)
      
      # Regression coefficients
      o.g$tr.coeff <- lrc[1]
      o.g$tr.coeff.lwr <- lrc[1] - 1.96*lrc[2]
      o.g$tr.coeff.upr <- lrc[1] + 1.96*lrc[2]
      o.g$se <- lrc[2]
      o.g$p <- lrc[4]
      o.g$att <- o.g$mean.tr.hat - o.g$mean.tr.cf
      o.g$att.lwr <- weighted.mean(o.g$tr.coeff.lwr * d.tr.mg$tr_v, w=w)
      o.g$att.upr <- weighted.mean(o.g$tr.coeff.upr * d.tr.mg$tr_v, w=w)
      o.g$mean.tr.cf.lwr <- o.g$mean.tr.hat - o.g$att.lwr
      o.g$mean.tr.cf.upr <- o.g$mean.tr.hat - o.g$att.upr
      o.g$perc.att <- -o.g$att/o.g$mean.tr.cf*100
      o.g$perc.att.lwr <- o.g$att.lwr/(o.g$att.lwr - o.g$mean.tr.hat)*100
      o.g$perc.att.upr <- o.g$att.upr/(o.g$att.upr - o.g$mean.tr.hat)*100
      
      out.g <- rbind(out.g,o.g)
    }
  }
}



#############
# AGGREGATION

write.csv(d.m,f('matched_sample.csv'),row.names=F)

write.csv(out.g, f('impacts_by_group_full.csv'), row.names=F)

out.g.a <- out.g %>% group_by(vg,g) %>% summarize(
  n.m = round(mean(n.m),1),
  ha = round(mean(ha),1),
  mean.tr = round(mean(mean.tr),5),
  sd.tr = round(mean(sd.tr),5),
  mean.ct = round(mean(mean.ct),5),
  sd.ct = round(mean(sd.ct),5),
  mean.tr.hat = round(mean(mean.tr.hat),5),
  mean.tr.cf = round(mean(mean.tr.cf),5),
  mean.tr.cf.lwr = round(mean(mean.tr.cf.lwr),5),
  mean.tr.cf.upr = round(mean(mean.tr.cf.upr),5),
  tr.coeff=round(mean(tr.coeff),5),
  tr.coeff.lwr=round(mean(tr.coeff.lwr),5),
  tr.coeff.upr=round(mean(tr.coeff.upr),5),
  se=round(mean(se),5),
  p=round(mean(p),5),
  att=round(mean(att),5),
  att.lwr=round(mean(att.lwr),5),
  att.upr=round(mean(att.upr),5),
  perc.att=round(mean(perc.att),3),
  perc.att.lwr=round(mean(perc.att.lwr),3),
  perc.att.upr=round(mean(perc.att.upr),3)
)
write.csv(out.g.a, f('impacts_by_group.csv'), row.names=F)

write.csv(out.m, f('matching_quality_full.csv'), row.names=F)

# Aggregate balance indicators (ugly, because I couldn't figure out how to pass columns names to dplyr summarize dynamically)
out.m.a <- out.m %>% summarize (
  n.tr.bm=mean(n.tr.bm), n.ct.bm=mean(n.ct.bm),
  n.tr.m=mean(n.tr.m), n.ct.m=mean(n.ct.m),
  ha.tr.bm=mean(ha.tr.bm), ha.ct.bm=mean(ha.ct.bm),
  ha.tr.m=mean(ha.tr.m), ha.ct.m=mean(ha.ct.m)
)
for (g in c('bm','m')) {
  for (vroot in c('',paste(c(p$cov,p$cov_yr),'.',sep=''))) {
    v <- paste('smd.', vroot, g, sep='')
    out.m.a[v] <- mean(unlist(out.m[v]))
  }
}
write.csv(out.m.a, f('matching_quality.csv'), row.names=F)

out.q.a <- out.q %>% group_by(vg) %>% summarize(
  q0=mean(q0), q33=mean(q33), q67=mean(q67), q100=mean(q100))
write.csv(out.q.a, f('cov_quantiles.csv'), row.names=F)


###########
# PLOTTING

# If you want to read another file:
#rf <- paste('Data/GIS/USA/GDB/- all/stats/MA/u nb 0.25s/',sep="")
#p$lc <- 'u'

out <- read.csv(f('impacts_by_group.csv'))

# Plotting
if(p$nearby) {
  xlim <- c(-0.005,0.16)
  ylim <- c(-0.005,0.16)
  if (p$lc=='f') {
    outcome_lab = 'forest loss'
  } else {
    outcome_lab = 'development'
  }
  width <- 1500
  height <- 1400
} else {
  if (p$lc=='f') {
    outcome_lab = 'forest loss'
    xlim <- c(-0.005, 0.18)
    ylim <- c(-0.005, 0.07)
  } else {
    outcome_lab = 'development'
    xlim <- c(-0.005, 0.075)
    ylim <- c(-0.005, 0.04)
  }
  width <- 1100
  height <- 800
}
#xlim <- range(-out$mean.ct,-out$mean.tr)*1.1



size_col = 'ha'
f_color = '#42b23d'
e_color = '#7cbeec'
min_n = 200*p$tr.sample # minimum numbers of observations in a group to display results
out['plot_size'] = out[, size_col]^0.5 * 6/max(out[size_col]^0.5)
out['plot_text_size'] = out[,size_col]^0.25*2/max(out[size_col]^0.25)
v_e_vs_f = c('protmech','tr_type','tr_type_sold')
for (vg in unique(out$vg)) {
  
  png(f('impacts_', vg, '.png'), width=width, height=height) 
  par(lwd=6,cex=3.5)

  og <- out[out$vg == vg & out$n.m > min_n & out$g != '-',]
  if (nrow(og)==0) next
  # xlim <- range(-og$mean.ct,-og$mean.tr, 0)*1.1
  
  plot(1, type='n', xlim=xlim, ylim=ylim, cex.lab=1.3,
       xlab=paste('%', outcome_lab, '/ yr (counterfactual)'), 
       ylab=paste('%', outcome_lab, '/ yr (obs.)'))
  abline(v=0,col='gray80')
  abline(h=0,col='gray80')
  abline(0,1,lty=3, col='gray80')
  
  points(-og$mean.tr.cf, -og$mean.tr, cex=pull(og['plot_size']), pch=20,
         col = alpha(if (vg %in% v_e_vs_f) ifelse(substr(og$g,0,1)=='e',e_color,f_color) else 'black',0.6))

  arrows(-og$mean.tr.cf.lwr, -og$mean.tr, -og$mean.tr.cf.upr, -og$mean.tr, angle=90, code=3, 
         col = if (vg %in% v_e_vs_f) ifelse(substr(og$g,0,1)=='e',e_color,f_color) else 'black',
         # lwd=par()$lwd*pull(og['plot_size'])/3, 
         length=0.25)
  
  # points(-og$mean.tr.cf, -og$mean.tr, pch=20)
  text(-og$mean.tr.cf, -og$mean.tr, if (vg %in% v_e_vs_f & vg != 'protmech') substr(og$g,3,7) else og$g, 
       cex = pull(og['plot_text_size'])/1.5, 
       pos = if (vg %in% v_e_vs_f) NULL else 3,
       offset=1)
  #title(if (vg=='full') 'Full Sample' else paste('Subgroups:',labs[[vg]]),cex.main=1.5)
  title(labs(vg),cex.main=1.5)
  
  dev.off()
}



# Balance plot
png(f('balance.png'), width=1500, height=1500)
par(mar=c(2,10,2,2.1), cex=4, lwd=2)
n <- length(c('',p$cov,p$cov_yr))
x.bm <- out.m.a[9:(8+n)]
x.m <- out.m.a[(9+n):(8+2*n)]
x <- t(matrix(c(unlist(x.m),unlist(x.bm)),c(n,2)))
y <- barplot(x, horiz=T, yaxt='n',beside=T)
axis(2,colMeans(y), sapply(colnames(x.m), function(x) substr(x,5,nchar(x)-3)), las=2)
dev.off()



# SENSITIVITY CHECKS 
# (with sample from last repetition)

# Rosenbaum Hack
rosenb <- psens(d.tr.mg$p_lcc_ann, d.ct.mg$p_lcc_ann, Gamma=10, GammaInc=0.01)$bounds
i_nosig <- which(rosenb$`Upper bound`>0.05)[1]
o.r <- data.frame(rb_gamma_sig = if (i_nosig>1) rosenb$Gamma[i_nosig-1] else NA,
                  rb_p_sig = if (i_nosig>1) rosenb$`Upper bound`[i_nosig-1] else NA,
                  rb_gamma_nosig = if(!is.na(i_nosig)) rosenb$Gamma[i_nosig] else NA,
                  rb_p_nosig = if(!is.na(i_nosig)) rosenb$`Upper bound`[i_nosig] else NA)
write.csv(o.r, f('rosenbaum.csv'), row.names=F)


# Blackwell Sensitivity Analysis
d.ps <- rbind(d.tr.sample, d.ct.sample)
colnames(d.ps)[which(colnames(d.ps)==paste('p_', p$lc, '_1985',sep=''))] <- 'p_lc_start'
colnames(d.ps)[which(colnames(d.ps)==paste('p_prot_1985_',pb,sep=''))] <- 'p_nb_prot'

lr2 <- lm(as.formula(paste('p_lcc_ann ~ tr + ', paste(c(p$cov, p$cov_yr), collapse='+'))),
          data=d.m)
ps <- glm(as.formula(paste('tr ~ ', paste(c(p$cov, p$cov_yr), collapse='+'))),
          data=d.ps, family=binomial())
cs <- causalsens(lr2, ps, as.formula(paste('~ ', paste(c(p$cov, p$cov_yr), collapse='+'))), 
                 one.sided.att, d.m, alpha= seq(-0.05, 0.05, by = 0.0025))
png(f('blackwell.png'), width=1600, height=1100) 
par(lwd=6,cex=3.5)
plot(cs, yaxt='n', xaxt='n', main=if(p$lc=='f') 'Impact: Forest Loss' else 'Impact: Development')
axis(1, lwd=par()$lwd)
axis(2, lwd=par()$lwd, las=2)
abline(h=summary(lr2)$coefficients[2,1], lty=3)
xy <- par('usr')
text(xy[2]-(xy[2]-xy[1])*0.22, summary(lr2)$coefficients[2,1], 'Average treatment effect', pos=3)
#abline(v=mean(cs$partial.r2), lty=3)
#text(mean(cs$partial.r2), 0.16, 'Mean variation explained', pos=4)
abline(v=median(cs$partial.r2), lty=3)
text(median(cs$partial.r2), xy[4]-(xy[4]-xy[3])*0.08, 'Median variation explained', pos=4)
dev.off()


# Export last matched sample
dex <- d[,'pid',drop=F]
dex['group'] <- ifelse(sapply(dex['pid'], function(x) x %in% d.tr.m$pid), 'tr_m',
                 ifelse(sapply(dex['pid'], function(x) x %in% d.ct.m$pid), 'ct_m',
                  ifelse(sapply(dex['pid'], function(x) x %in% d.tr.sample$pid), 'tr_d',
                   ifelse(sapply(dex['pid'], function(x) x %in% d.tr.full$pid), 'tr_x',
                    ifelse(sapply(dex['pid'], function(x) x %in% d.ct.sample$pid),'ct_d',
                     ifelse(sapply(dex['pid'], function(x) x %in% d.ct.full$pid),'ct_x','na'))))))
write.csv(dex,f('pc_matched.csv'),row.names=F)

if (p$beep) beep(1)



# ---------------------------------------------------------------------------------------------------------



#################
# DATA INSPECTION

# Function to plot land cover time trend
plot.trend <- function(d, id, lc='f') {
  dx = d[d[,p$pc_id_col]==id,]
  plot(seq(1988,2012),t(dx[,paste('p_',lc,'_',seq(1988,2012),sep='')]), type='l', ylim=c(0,100),
       xlab='Year',ylab=if(lc=='f') '% Forest' else '% Undeveloped', col='forestgreen',lwd=2,yaxt='n')
  axis(2,las=2)
  abline(v=dx$tr_year,lty=3,lwd=3,col='cornflowerblue')
  title(dx[,p$pc_id_col])
}

# # Individual Property Graphs
#plot.ftrend(d,'F_580749_3002764')


# Data Inspection: give me the properties with the biggest loss
if(F) {
  di <- d.m[d.m$tr == 1,]
  di$loss <- di$p_lcc_ann * di$ha
  di.top <- di[order(di$loss),c(p$pc_id_col,'tr_type','p_lcc_ann','loss','tr_year','year_ref')]
  head(di.top,n=30)
  write.csv(head(di.top,n=30),f('top_losers.csv'))

  test <- d.tr.m[d.tr.m$tr_type=='e_LOC',c('pid','p_lcc_ann','ha')]
  test$lcc_ha <- test$p_lcc_ann*test$ha
  head(test[order(test$lcc_ha),],2)
}

# Identify: properties with major forest loss around the year of protection
# (With few exceptions, these are locally protected easements: 
#  either golf courses or development set-asides (partial easements))
if(F) {
  d$prot_within_pontus <- d$is_prot & !is.na(d$tr_year) & (d$tr_year > 1988 + p$year.offset) & (d$tr_year < 2012 - p$year.offset)
  # Function to calculate forest loss around protection date. Because matrix converts dataframe to string array, there's a little too much "as.numeric" in here.
  p_loss_at_prot <- function(x) { (as.numeric(x[paste('p_f_', as.numeric(x['tr_year']) + yr_offset, sep='')]) - as.numeric(x[paste('p_f_', as.numeric(x['tr_year']) - yr_offset, sep='')]))}
  d$p_loss_at_prot <- ifelse(!d$prot_within_pontus, NA, apply(d, 1, p_loss_at_prot))
  d$loss_at_prot = !is.na(d$p_loss_at_prot) & (d$p_loss_at_prot < -0.2)
}
# losers = d[d$loss_at_prot,c('LOC_ID','loss_at_prot','p_loss_at_prot')]
# write.csv(losers, 'Data/GIS/USA/GDB/MA/- temp/protection_losers.csv')





# My attempt to make graphs of post-protection time trends
if(F) {

  delta <- 7
#  i <- d.tr.full$tr_year>=1988
  i <- d.tr.full$tr_year>=1988+delta & d.tr.full$tr_year<=2012-delta
  dt <- d.tr.full[i,c('tr_year','ha',paste('p_f_',1988:2012,sep =''))]
  dt[paste('p_f_',(1988-delta):1987,sep='')] <- NA
  dt[paste('p_f_',2013:(2012+delta),sep='')] <- NA
  
  dtv <- data.frame()
  for (i in 1:nrow(dt)) {
    val <- dt[i,c('ha',paste('p_f_',(dt[i,'tr_year']-delta):(dt[i,'tr_year']+delta),sep=''))]
    colnames(val) <- c('ha',paste('',(-delta):delta,sep=''))
    dtv <- rbind(dtv,val)
  }
  
  x <- (-delta):delta
  y <- apply(dtv[,2:length(dtv)],2,function(x) weighted.mean(x, dtv$ha, na.rm=T))#colMeans(dtv, na.rm=T)
  
  plot(1, type='n', xaxt='n',yaxt='n', xlim=range(x), ylim=range(y), ylab='% Forest Cover')
  lines(x,y, lwd=2, col='forestgreen')
  axis(1, at=x,labels=colnames(y))
  axis(2, las=2)
  abline(v=0, lty=3, lwd=2, col='blue')

}



